Question 1 In class we computed the run expectancy matrix for the 2016 season. We used this quantity to assess the value of stolen bases and we computed the marginal break even stolen base percentage required to justify an attempt. Do the following:

fields = read_csv("fields.csv")
## Rows: 97 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Description, Header
## dbl (1): Field number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat2016 = read_csv("all2016.csv",
                   col_names = pull(fields, Header),
                   na = character())
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 190715 Columns: 97
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (35): GAME_ID, AWAY_TEAM_ID, PITCH_SEQ_TX, BAT_ID, BAT_HAND_CD, RESP_BAT...
## dbl (35): INN_CT, BAT_HOME_ID, OUTS_CT, BALLS_CT, STRIKES_CT, AWAY_SCORE_CT,...
## lgl (27): LEADOFF_FL, PH_FL, BAT_EVENT_FL, AB_FL, SH_FL, SF_FL, DP_FL, TP_FL...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dat2016_updated = dat2016 %>% 
  mutate(RUNS = AWAY_SCORE_CT + HOME_SCORE_CT, 
         HALF.INNING = paste(GAME_ID, INN_CT, BAT_HOME_ID), 
         RUNS.SCORED = (BAT_DEST_ID > 3) + (RUN1_DEST_ID > 3) + 
                 (RUN2_DEST_ID > 3) + (RUN3_DEST_ID > 3))

half_innings = dat2016_updated %>% 
  group_by(HALF.INNING) %>% 
  summarise(Outs.Inning = sum(EVENT_OUTS_CT), 
            Runs.Inning = sum(RUNS.SCORED), 
            Runs.Start = first(RUNS), 
            MAX.RUNS = Runs.Inning + Runs.Start)

dat2016_updated = dat2016_updated %>% 
  inner_join(half_innings, by = "HALF.INNING") %>% 
  mutate(RUNS.ROI = MAX.RUNS - RUNS)   

dat2016_updated = 
  dat2016_updated %>% mutate(BASES = paste(ifelse(BASE1_RUN_ID != "",1,0), 
                                 ifelse(BASE2_RUN_ID != "",1,0),
                                 ifelse(BASE3_RUN_ID != "",1,0), sep = ""), 
                     STATE = paste(BASES, OUTS_CT))
dat2016_updated = dat2016_updated %>% 
  mutate(NRUNNER1 = as.numeric(RUN1_DEST_ID == 1 | BAT_DEST_ID == 1), 
         NRUNNER2 = as.numeric(RUN1_DEST_ID == 2 | RUN2_DEST_ID == 2 | BAT_DEST_ID == 2), 
         NRUNNER3 = as.numeric(RUN1_DEST_ID == 3 | RUN2_DEST_ID == 3 | 
           RUN3_DEST_ID == 3 | BAT_DEST_ID == 3), 
         NOUTS = OUTS_CT + EVENT_OUTS_CT, 
         NEW.BASES = paste(NRUNNER1, NRUNNER2, NRUNNER3, sep = ""), 
         NEW.STATE = paste(NEW.BASES, NOUTS)) %>% 
  filter((STATE != NEW.STATE) | (RUNS.SCORED > 0)) %>% 
  filter(Outs.Inning == 3)
RUNS = dat2016_updated %>% 
  group_by(STATE) %>% 
  summarize(Mean = mean(RUNS.ROI)) %>% 
  mutate(Outs = substr(STATE, 5, 5)) %>% 
  arrange(Outs)

RUNS_out_2016 = matrix(round(RUNS$Mean, 2), 8, 3)
dimnames(RUNS_out_2016)[[1]] = c("000","001","010","011",
                             "100","101","110","111")
dimnames(RUNS_out_2016)[[2]] = c("0 outs", "1 out", "2 outs")
RUNS_out_2016
##     0 outs 1 out 2 outs
## 000   0.50  0.27   0.11
## 001   1.35  0.94   0.37
## 010   1.13  0.67   0.31
## 011   1.93  1.36   0.55
## 100   0.86  0.51   0.22
## 101   1.72  1.20   0.48
## 110   1.44  0.92   0.41
## 111   2.11  1.54   0.70
dat2016_SB = dat2016_updated%>%
  select(POS2_FLD_ID, EVENT_TX)%>%
  filter(grepl("SB", EVENT_TX))%>%
  group_by(POS2_FLD_ID) %>%
  summarise(total_SB = n())


dat2016_CB <- dat2016_updated %>%
  select(POS2_FLD_ID, RUN1_CS_FL, RUN2_CS_FL, RUN3_CS_FL, EVENT_TX) %>%  
  mutate(total_CS_pg = RUN1_CS_FL + RUN2_CS_FL + RUN3_CS_FL) %>%  
  select(POS2_FLD_ID, total_CS_pg, EVENT_TX)%>%
  group_by(POS2_FLD_ID) %>%  
  summarise(total_CS = sum(total_CS_pg))

dat2016_steal <- merge(x = dat2016_CB, y = dat2016_SB, by = "POS2_FLD_ID")
dat2016_BC = dat2016_steal%>%
  mutate(total_attempts = total_SB + total_CS)%>%
  filter(total_attempts >= 10)%>%
  mutate(caught_PCT = total_CS/total_attempts)%>%
  arrange(desc(caught_PCT))
dat2016_BC
top10_catchers = head(dat2016_BC, 10)
top10_catchers
worst10_catchers = tail(dat2016_BC, 10)
worst10_catchers
dat2016_steal <- dat2016_updated %>%
  left_join(RUNS, by = "STATE") %>%  
  rename(run_value = Mean)  

dat2016_steal <- dat2016_steal %>%
  mutate(
    SB_value = ifelse(EVENT_CD == 4, run_value, 0),  
    CS_value = ifelse(EVENT_CD == 6, run_value, 0)   
  )

dat2016_steal <- dat2016_steal %>%
  group_by(STATE) %>%
  summarise(
    total_SB = sum(SB_value),
    total_CS = sum(CS_value),
    total_attempts = total_SB + total_CS,  
    avg_run_value_SB = mean(SB_value),     
    avg_run_value_CS = mean(CS_value)      
  ) %>%
  filter(total_attempts >= 10) %>%  
  mutate(
    break_even_pct = avg_run_value_CS / (avg_run_value_SB - avg_run_value_CS)  
  )

dat2016_steal
min(dat2016_steal$break_even_pct)
## [1] 0.1265823
#it is best to attempt a SB when there is a runner on first and third with 1 out

Question 2 In the Simulation Notes we considered team specific transition probabilities for one base out state corresponding to the St. Louis Cardinals in 2016. Do the following:

cardinals_2016 = dat2016 %>% 
  filter(AWAY_TEAM_ID == "SLN" | grepl("SLN", GAME_ID))%>%
      mutate(RUNS  = AWAY_SCORE_CT + HOME_SCORE_CT, 
         HALF.INNING = paste(GAME_ID, INN_CT, BAT_HOME_ID),
         RUNS.SCORED = (BAT_DEST_ID > 3) + (RUN1_DEST_ID > 3) + 
           (RUN2_DEST_ID > 3) + (RUN3_DEST_ID > 3))

half_innings_SLN = cardinals_2016 %>% 
  group_by(HALF.INNING) %>% 
  summarize(Outs.Inning = sum(EVENT_OUTS_CT), 
            Runs.Inning = sum(RUNS.SCORED), 
            Runs.Start = first(RUNS),
            MAX.RUNS = Runs.Inning + Runs.Start)


cardinals_2016 = cardinals_2016 %>% 
  inner_join(half_innings_SLN, by = "HALF.INNING")%>% 
  mutate(RUNS.ROI = MAX.RUNS - RUNS)%>%
  mutate(BASES = paste(ifelse(BASE1_RUN_ID > '', 1, 0),
                       ifelse(BASE2_RUN_ID > '', 1, 0),
                       ifelse(BASE3_RUN_ID > '', 1, 0), sep = ""), 
         STATE = paste(BASES, OUTS_CT), 
         NRUNNER1 = as.numeric(RUN1_DEST_ID == 1 | BAT_DEST_ID == 1), 
         NRUNNER2 = as.numeric(RUN1_DEST_ID == 2 | RUN2_DEST_ID == 2 | BAT_DEST_ID == 2), 
         NRUNNER3 = as.numeric(RUN1_DEST_ID == 3 | RUN2_DEST_ID == 3 | 
                               RUN3_DEST_ID == 3 | BAT_DEST_ID == 3), 
         NOUTS = OUTS_CT + EVENT_OUTS_CT, 
         NEW.BASES = paste(NRUNNER1, NRUNNER2, NRUNNER3, sep = ""),
         NEW.STATE = paste(NEW.BASES, NOUTS))

cardinals_2016 = cardinals_2016 %>% 
  filter((STATE != NEW.STATE) | (RUNS.SCORED > 0))
cardinals_2016C = cardinals_2016 %>% 
  filter(Outs.Inning == 3, BAT_EVENT_FL == TRUE)
cardinals_2016C = cardinals_2016C %>% 
  mutate(NEW.STATE = gsub("[0-1]{3} 3", "3", NEW.STATE))

T_matrix_2016 = cardinals_2016C %>% 
  select(STATE, NEW.STATE) %>% 
  table()
T_matrix_2016
##        NEW.STATE
## STATE   000 0 000 1 000 2 001 0 001 1 001 2 010 0 010 1 010 2 011 0 011 1 011 2
##   000 0   109  1999     0    19     0     0   184     0     0     0     0     0
##   000 1     0    62  1456     0    12     0     0    96     0     0     0     0
##   000 2     0     0    59     0     0     3     0     0    60     0     0     0
##   001 0     0     5     0     0    14     0     2     1     0     0     0     0
##   001 1     0     2    28     0     0    42     0     9     2     0     0     0
##   001 2     0     0     6     0     0     1     0     0     8     0     0     0
##   010 0    10     1     2     1    60     0    16    95     0     1     0     0
##   010 1     0     9     0     0     2    74     0    15   142     0     1     0
##   010 2     0     0    12     0     0     1     0     0    24     0     0     0
##   011 0     1     0     0     0     7     0     0     4     0     0    11     0
##   011 1     0     4     0     0     0    11     0     5    11     0     0    21
##   011 2     0     0     4     0     0     0     0     0     6     0     0     0
##   100 0    14     2    88     4     1     0    14    69     0    17     0     0
##   100 1     0    20     1     0     8     3     0    10    55     0    31     0
##   100 2     0     0    20     0     0     5     0     0    11     0     0    27
##   101 0     3     0    11     1     0     0     0     3     0     1     1     0
##   101 1     0     9     0     0     2     0     0     1     7     0     7     5
##   101 2     0     0     5     0     0     0     0     0     7     0     0     3
##   110 0     7     0     0     0     0    16     3     1     2     5    22     0
##   110 1     0     5     1     0     2     0     0     6     1     0    12    19
##   110 2     0     0    11     0     0     1     0     0    10     0     0     8
##   111 0     1     0     0     0     0     1     0     0     0     2     3     1
##   111 1     0     4     0     0     1     0     0     0     0     0     4     9
##   111 2     0     0     3     0     0     0     0     0     0     0     0     9
##        NEW.STATE
## STATE   100 0 100 1 100 2 101 0 101 1 101 2 110 0 110 1 110 2 111 0 111 1 111 2
##   000 0   720     0     0     0     0     0     0     0     0     0     0     0
##   000 1     0   508     0     0     0     0     0     0     0     0     0     0
##   000 2     0     0   398     0     0     0     0     0     0     0     0     0
##   001 0     6     1     0     6     0     0     0     0     0     0     0     0
##   001 1     0    21     2     0    21     0     0     0     0     0     0     0
##   001 2     0     0    25     0     0    30     0     0     0     0     0     0
##   010 0    13     4     0    21     0     0    20     0     0     0     0     0
##   010 1     0    21     5     0    27     0     0    41     0     0     0     0
##   010 2     0     0    30     0     0    21     0     0    67     0     0     0
##   011 0     2     0     0     3     0     0     1     0     0     3     0     0
##   011 1     0     5     0     0    13     3     0     2     2     0    24     0
##   011 2     0     0    12     0     0     3     0     0     0     0     0    34
##   100 0     0   329     0    29     0     0   130     0     0     0     0     0
##   100 1     0     0   405     0    40     0     0   153     0     0     0     0
##   100 2     0     0     2     0     0    43     0     0   159     0     0     0
##   101 0     0     9     0     2    16     0     7     1     0     6     0     0
##   101 1     0     0    24     0     8    37     0    15     3     0    15     0
##   101 2     0     0     0     0     0    11     0     0    17     0     0    16
##   110 0     0     0     1     2    17     0     6    53     0    25     0     0
##   110 1     0     0     1     0     8    27     0    14   102     0    41     0
##   110 2     0     0     1     0     0    11     0     0    15     0     0    41
##   111 0     0     0     0     2     4     0     4     3     0     8    13     0
##   111 1     0     0     1     0     2    13     0     4     9     0    20    26
##   111 2     0     0     0     0     0     6     0     0     6     0     0    12
##        NEW.STATE
## STATE      3
##   000 0    0
##   000 1    0
##   000 2 1215
##   001 0    0
##   001 1    2
##   001 2  104
##   010 0    0
##   010 1    0
##   010 2  253
##   011 0    0
##   011 1    2
##   011 2   77
##   100 0    0
##   100 1  110
##   100 2  552
##   101 0    0
##   101 1   16
##   101 2  122
##   110 0    0
##   110 1   35
##   110 2  266
##   111 0    0
##   111 1   18
##   111 2   93
P_matrix_2016 = prop.table(T_matrix_2016, 1)
P_matrix_2016 = rbind(P_matrix_2016, c(rep(0, 24), 1))
round(P_matrix_2016,2)
##       000 0 000 1 000 2 001 0 001 1 001 2 010 0 010 1 010 2 011 0 011 1 011 2
## 000 0  0.04  0.66  0.00  0.01  0.00  0.00  0.06  0.00  0.00  0.00  0.00  0.00
## 000 1  0.00  0.03  0.68  0.00  0.01  0.00  0.00  0.04  0.00  0.00  0.00  0.00
## 000 2  0.00  0.00  0.03  0.00  0.00  0.00  0.00  0.00  0.03  0.00  0.00  0.00
## 001 0  0.00  0.14  0.00  0.00  0.40  0.00  0.06  0.03  0.00  0.00  0.00  0.00
## 001 1  0.00  0.02  0.22  0.00  0.00  0.33  0.00  0.07  0.02  0.00  0.00  0.00
## 001 2  0.00  0.00  0.03  0.00  0.00  0.01  0.00  0.00  0.05  0.00  0.00  0.00
## 010 0  0.04  0.00  0.01  0.00  0.25  0.00  0.07  0.39  0.00  0.00  0.00  0.00
## 010 1  0.00  0.03  0.00  0.00  0.01  0.22  0.00  0.04  0.42  0.00  0.00  0.00
## 010 2  0.00  0.00  0.03  0.00  0.00  0.00  0.00  0.00  0.06  0.00  0.00  0.00
## 011 0  0.03  0.00  0.00  0.00  0.22  0.00  0.00  0.12  0.00  0.00  0.34  0.00
## 011 1  0.00  0.04  0.00  0.00  0.00  0.11  0.00  0.05  0.11  0.00  0.00  0.20
## 011 2  0.00  0.00  0.03  0.00  0.00  0.00  0.00  0.00  0.04  0.00  0.00  0.00
## 100 0  0.02  0.00  0.13  0.01  0.00  0.00  0.02  0.10  0.00  0.02  0.00  0.00
## 100 1  0.00  0.02  0.00  0.00  0.01  0.00  0.00  0.01  0.07  0.00  0.04  0.00
## 100 2  0.00  0.00  0.02  0.00  0.00  0.01  0.00  0.00  0.01  0.00  0.00  0.03
## 101 0  0.05  0.00  0.18  0.02  0.00  0.00  0.00  0.05  0.00  0.02  0.02  0.00
## 101 1  0.00  0.06  0.00  0.00  0.01  0.00  0.00  0.01  0.05  0.00  0.05  0.03
## 101 2  0.00  0.00  0.03  0.00  0.00  0.00  0.00  0.00  0.04  0.00  0.00  0.02
## 110 0  0.04  0.00  0.00  0.00  0.00  0.10  0.02  0.01  0.01  0.03  0.14  0.00
## 110 1  0.00  0.02  0.00  0.00  0.01  0.00  0.00  0.02  0.00  0.00  0.04  0.07
## 110 2  0.00  0.00  0.03  0.00  0.00  0.00  0.00  0.00  0.03  0.00  0.00  0.02
## 111 0  0.02  0.00  0.00  0.00  0.00  0.02  0.00  0.00  0.00  0.05  0.07  0.02
## 111 1  0.00  0.04  0.00  0.00  0.01  0.00  0.00  0.00  0.00  0.00  0.04  0.08
## 111 2  0.00  0.00  0.02  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.07
##        0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
##       100 0 100 1 100 2 101 0 101 1 101 2 110 0 110 1 110 2 111 0 111 1 111 2
## 000 0  0.24  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 000 1  0.00  0.24  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 000 2  0.00  0.00  0.23  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 001 0  0.17  0.03  0.00  0.17  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 001 1  0.00  0.16  0.02  0.00  0.16  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 001 2  0.00  0.00  0.14  0.00  0.00  0.17  0.00  0.00  0.00  0.00  0.00  0.00
## 010 0  0.05  0.02  0.00  0.09  0.00  0.00  0.08  0.00  0.00  0.00  0.00  0.00
## 010 1  0.00  0.06  0.01  0.00  0.08  0.00  0.00  0.12  0.00  0.00  0.00  0.00
## 010 2  0.00  0.00  0.07  0.00  0.00  0.05  0.00  0.00  0.16  0.00  0.00  0.00
## 011 0  0.06  0.00  0.00  0.09  0.00  0.00  0.03  0.00  0.00  0.09  0.00  0.00
## 011 1  0.00  0.05  0.00  0.00  0.13  0.03  0.00  0.02  0.02  0.00  0.23  0.00
## 011 2  0.00  0.00  0.09  0.00  0.00  0.02  0.00  0.00  0.00  0.00  0.00  0.25
## 100 0  0.00  0.47  0.00  0.04  0.00  0.00  0.19  0.00  0.00  0.00  0.00  0.00
## 100 1  0.00  0.00  0.48  0.00  0.05  0.00  0.00  0.18  0.00  0.00  0.00  0.00
## 100 2  0.00  0.00  0.00  0.00  0.00  0.05  0.00  0.00  0.19  0.00  0.00  0.00
## 101 0  0.00  0.15  0.00  0.03  0.26  0.00  0.11  0.02  0.00  0.10  0.00  0.00
## 101 1  0.00  0.00  0.16  0.00  0.05  0.25  0.00  0.10  0.02  0.00  0.10  0.00
## 101 2  0.00  0.00  0.00  0.00  0.00  0.06  0.00  0.00  0.09  0.00  0.00  0.09
## 110 0  0.00  0.00  0.01  0.01  0.11  0.00  0.04  0.33  0.00  0.16  0.00  0.00
## 110 1  0.00  0.00  0.00  0.00  0.03  0.10  0.00  0.05  0.37  0.00  0.15  0.00
## 110 2  0.00  0.00  0.00  0.00  0.00  0.03  0.00  0.00  0.04  0.00  0.00  0.11
## 111 0  0.00  0.00  0.00  0.05  0.10  0.00  0.10  0.07  0.00  0.19  0.31  0.00
## 111 1  0.00  0.00  0.01  0.00  0.02  0.12  0.00  0.04  0.08  0.00  0.18  0.23
## 111 2  0.00  0.00  0.00  0.00  0.00  0.05  0.00  0.00  0.05  0.00  0.00  0.09
##        0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
##          3
## 000 0 0.00
## 000 1 0.00
## 000 2 0.70
## 001 0 0.00
## 001 1 0.02
## 001 2 0.60
## 010 0 0.00
## 010 1 0.00
## 010 2 0.62
## 011 0 0.00
## 011 1 0.02
## 011 2 0.57
## 100 0 0.00
## 100 1 0.13
## 100 2 0.67
## 101 0 0.00
## 101 1 0.11
## 101 2 0.67
## 110 0 0.00
## 110 1 0.13
## 110 2 0.73
## 111 0 0.00
## 111 1 0.16
## 111 2 0.72
##       1.00
#creating the transition probability matrix
cardinals_2016C = cardinals_2016C %>%
  mutate(HOME_TEAM_ID = str_sub(GAME_ID, 1, 3),
         BATTING.TEAM = ifelse(BAT_HOME_ID == 0,
                               AWAY_TEAM_ID, HOME_TEAM_ID))

Team.T.S = cardinals_2016C %>% 
  group_by(BATTING.TEAM, STATE, NEW.STATE) %>% 
  tally()

SLN.Trans = Team.T.S %>% filter(BATTING.TEAM == "SLN") %>% 
  mutate(p = n / sum(n))

All.Trans = cardinals_2016C %>% 
  group_by(NEW.STATE) %>% 
  tally() %>% 
  mutate(p = n / sum(n))

SLN.Trans %>% inner_join(All.Trans, by = "NEW.STATE") %>% 
  mutate(p.EST = n.x / (1274 + n.x) * p.x + 1274 / (1274 + n.x) * p.y) %>% 
  mutate(p.EST = p.EST / sum(p.EST)) %>%
  select(BATTING.TEAM, NEW.STATE, p.x, p.y, p.EST)
## Adding missing grouping variables: `STATE`
SLN.Trans
#normalized outcomes for al base states
P_matrix_3_2016 = P_matrix_2016 %*% P_matrix_2016 %*% P_matrix_2016

P_matrix_3_2016 %>% as_tibble(rownames = "STATE") %>% 
  filter(STATE == "000 0") %>%
  gather(key = "NEW.STATE", value = "Prob", -STATE) 
#movement through the base out states after 3 PAs to start a half inning for SLN
dat2016 = dat2016 %>% 
  mutate(RUNS  = AWAY_SCORE_CT + HOME_SCORE_CT, 
         HALF.INNING = paste(GAME_ID, INN_CT, BAT_HOME_ID),
         RUNS.SCORED = (BAT_DEST_ID > 3) + (RUN1_DEST_ID > 3) + 
           (RUN2_DEST_ID > 3) + (RUN3_DEST_ID > 3))

half_innings = dat2016 %>% 
  group_by(HALF.INNING) %>% 
  summarize(Outs.Inning = sum(EVENT_OUTS_CT), 
            Runs.Inning = sum(RUNS.SCORED), 
            Runs.Start = first(RUNS),
            MAX.RUNS = Runs.Inning + Runs.Start)

dat2016 = dat2016 %>% 
  inner_join(half_innings, by = "HALF.INNING") %>% 
  mutate(BASES = paste(ifelse(BASE1_RUN_ID > '', 1, 0),
                       ifelse(BASE2_RUN_ID > '', 1, 0),
                       ifelse(BASE3_RUN_ID > '', 1, 0), sep = ""), 
         STATE = paste(BASES, OUTS_CT), 
         NRUNNER1 = as.numeric(RUN1_DEST_ID == 1 | BAT_DEST_ID == 1), 
         NRUNNER2 = as.numeric(RUN1_DEST_ID == 2 | RUN2_DEST_ID == 2 | BAT_DEST_ID == 2), 
         NRUNNER3 = as.numeric(RUN1_DEST_ID == 3 | RUN2_DEST_ID == 3 | 
                               RUN3_DEST_ID == 3 | BAT_DEST_ID == 3), 
         NOUTS = OUTS_CT + EVENT_OUTS_CT, 
         NEW.BASES = paste(NRUNNER1, NRUNNER2, NRUNNER3, sep = ""),
         NEW.STATE = paste(NEW.BASES, NOUTS))


dat2016 = dat2016 %>% filter((STATE != NEW.STATE) | (RUNS.SCORED > 0))
dat2016C = dat2016 %>% filter(Outs.Inning == 3, BAT_EVENT_FL == TRUE)
dat2016C = dat2016C %>% mutate(NEW.STATE = gsub("[0-1]{3} 3", "3", NEW.STATE))

T_matrix = dat2016C %>% 
  select(STATE, NEW.STATE) %>% 
  table()

P_matrix = prop.table(T_matrix, 1)

P_matrix = rbind(P_matrix, c(rep(0, 24), 1))

P_matrix_3 = P_matrix %*% P_matrix %*% P_matrix

P_matrix_3 %>% as_tibble(rownames = "STATE") %>% 
  filter(STATE == "000 0") %>% 
  gather(key = "NEW.STATE", value = "Prob", -STATE)
difference_matrix <- P_matrix_3 - P_matrix_3_2016
difference_matrix
##               000 0         000 1         000 2        001 0         001 1
## 000 0 -6.490883e-04  0.0028539445 -3.558890e-03 0.0001586831 -1.311976e-03
## 000 1  0.000000e+00  0.0002003984  1.539482e-03 0.0000000000 -2.624754e-05
## 000 2  0.000000e+00  0.0000000000 -7.965831e-05 0.0000000000  0.000000e+00
## 001 0 -1.242886e-03 -0.0038779531  1.392255e-02 0.0001847218 -1.767864e-03
## 001 1  0.000000e+00 -0.0003239156 -2.569653e-03 0.0000000000 -7.705327e-05
## 001 2  0.000000e+00  0.0000000000 -7.124370e-04 0.0000000000  0.000000e+00
## 010 0 -2.087094e-04 -0.0019641133 -7.208625e-03 0.0001802314 -7.941749e-04
## 010 1  0.000000e+00 -0.0003742649 -1.407249e-03 0.0000000000 -5.502009e-05
## 010 2  0.000000e+00  0.0000000000 -3.828949e-04 0.0000000000  0.000000e+00
## 011 0  1.941019e-05 -0.0032607541  1.318753e-03 0.0003207199 -4.728886e-04
## 011 1  0.000000e+00 -0.0015756483 -6.924897e-03 0.0000000000 -2.837590e-04
## 011 2  0.000000e+00  0.0000000000 -9.380237e-04 0.0000000000  0.000000e+00
## 100 0  2.593476e-04 -0.0002414708  5.930455e-03 0.0002457131  6.530559e-04
## 100 1  0.000000e+00 -0.0006118447 -1.683291e-04 0.0000000000 -6.954366e-05
## 100 2  0.000000e+00  0.0000000000 -1.496407e-04 0.0000000000  0.000000e+00
## 101 0 -2.215476e-04 -0.0004161962 -1.062944e-02 0.0001921894  2.987031e-04
## 101 1  0.000000e+00 -0.0012211192 -3.218665e-03 0.0000000000 -2.404122e-04
## 101 2  0.000000e+00  0.0000000000 -2.634892e-04 0.0000000000  0.000000e+00
## 110 0 -4.872115e-04 -0.0023217929 -6.677461e-03 0.0001535850  5.308873e-06
## 110 1  0.000000e+00 -0.0007797843 -1.451056e-03 0.0000000000 -1.115419e-04
## 110 2  0.000000e+00  0.0000000000  7.249764e-05 0.0000000000  0.000000e+00
## 111 0 -1.606827e-03 -0.0065666678 -2.290580e-03 0.0001064977 -9.199476e-04
## 111 1  0.000000e+00 -0.0007346100 -1.830191e-03 0.0000000000 -1.074248e-04
## 111 2  0.000000e+00  0.0000000000 -1.666394e-04 0.0000000000  0.000000e+00
##        0.000000e+00  0.0000000000  0.000000e+00 0.0000000000  0.000000e+00
##               001 2         010 0         010 1         010 2         011 0
## 000 0 -0.0020138430 -3.449968e-04 -1.599267e-03  6.104631e-03  0.0006899824
## 000 1  0.0001695892  0.000000e+00 -3.334018e-04  3.472838e-03  0.0000000000
## 000 2  0.0003185696  0.000000e+00  0.000000e+00 -1.179499e-04  0.0000000000
## 001 0 -0.0025981721 -4.269363e-04  1.149631e-03  8.576322e-05 -0.0000402902
## 001 1  0.0005579519  0.000000e+00 -5.140223e-04  4.950051e-05  0.0000000000
## 001 2  0.0004012017  0.000000e+00  0.000000e+00 -6.737570e-04  0.0000000000
## 010 0 -0.0002707163 -5.217322e-05 -1.913926e-03 -4.820386e-04  0.0003219908
## 010 1  0.0007205461  0.000000e+00 -8.565747e-05 -1.472296e-03  0.0000000000
## 010 2  0.0004383014  0.000000e+00  0.000000e+00 -6.159874e-05  0.0000000000
## 011 0  0.0067769582  6.431112e-04  3.550172e-03  1.990219e-04  0.0001119701
## 011 1  0.0012957794  0.000000e+00 -5.344316e-04  1.669659e-04  0.0000000000
## 011 2  0.0004334183  0.000000e+00  0.000000e+00 -6.534955e-04  0.0000000000
## 100 0  0.0024909770  5.872191e-04 -1.084279e-03  1.537984e-03  0.0001013319
## 100 1  0.0009043810  0.000000e+00  2.559653e-04 -1.510680e-03  0.0000000000
## 100 2  0.0004691194  0.000000e+00  0.000000e+00  4.457830e-04  0.0000000000
## 101 0  0.0060632057  5.354874e-04 -1.008626e-03  5.092390e-03 -0.0003929411
## 101 1  0.0010178500  0.000000e+00 -1.071778e-06 -1.821118e-03  0.0000000000
## 101 2  0.0004481059  0.000000e+00  0.000000e+00  1.696753e-04  0.0000000000
## 110 0  0.0009257600  5.261766e-04 -3.172333e-04 -3.787805e-03 -0.0005532662
## 110 1  0.0021193742  0.000000e+00  3.247403e-04  1.756478e-03  0.0000000000
## 110 2  0.0004352357  0.000000e+00  0.000000e+00  4.730328e-04  0.0000000000
## 111 0  0.0010478472  2.814284e-04 -8.678322e-04 -2.928835e-04 -0.0017210202
## 111 1  0.0021522118  0.000000e+00  3.298029e-04  2.016535e-03  0.0000000000
## 111 2  0.0004248213  0.000000e+00  0.000000e+00  4.959913e-04  0.0000000000
##        0.0000000000  0.000000e+00  0.000000e+00  0.000000e+00  0.0000000000
##               011 1         011 2         100 0         100 1         100 2
## 000 0 -0.0006851731  0.0000000000  1.464346e-04 -0.0016947125 -2.051214e-03
## 000 1 -0.0006053198 -0.0021847932  0.000000e+00  0.0002700964 -4.115444e-04
## 000 2  0.0000000000  0.0002190896  0.000000e+00  0.0000000000  1.038605e-04
## 001 0 -0.0049174571 -0.0019118819 -1.307736e-03  0.0081764067  3.152791e-03
## 001 1 -0.0007791153 -0.0015476035  0.000000e+00 -0.0008853755 -1.373896e-03
## 001 2  0.0000000000 -0.0006957471  0.000000e+00  0.0000000000 -7.428729e-04
## 010 0 -0.0007490181  0.0026948397 -6.911117e-04 -0.0023152338 -3.276028e-03
## 010 1 -0.0004471178 -0.0009749485  0.000000e+00 -0.0001769207 -1.454425e-03
## 010 2  0.0000000000 -0.0009121272  0.000000e+00  0.0000000000 -5.213330e-04
## 011 0 -0.0043236662 -0.0025014338  3.619349e-05  0.0058581291 -2.076580e-02
## 011 1 -0.0013443764 -0.0073251746  0.000000e+00 -0.0020791185 -5.224700e-03
## 011 2  0.0000000000 -0.0012291923  0.000000e+00  0.0000000000 -1.432486e-03
## 100 0 -0.0014650328  0.0037273306 -3.064449e-04  0.0045374922 -2.354861e-03
## 100 1 -0.0003089357  0.0002292013  0.000000e+00  0.0001438972 -1.790505e-04
## 100 2  0.0000000000 -0.0013773365  0.000000e+00  0.0000000000 -2.537211e-04
## 101 0 -0.0019829025  0.0041797693 -1.198131e-04 -0.0035211422  2.915029e-03
## 101 1 -0.0008817698 -0.0026167256  0.000000e+00 -0.0007642824 -9.067362e-03
## 101 2  0.0000000000 -0.0010928036  0.000000e+00  0.0000000000 -4.434904e-04
## 110 0 -0.0033293714  0.0001321219 -4.660643e-05 -0.0030267308 -1.318469e-04
## 110 1 -0.0004571530 -0.0038694770  0.000000e+00 -0.0002975768  2.501736e-04
## 110 2  0.0000000000 -0.0007134870  0.000000e+00  0.0000000000 -2.963919e-05
## 111 0 -0.0086558794 -0.0070885080 -1.227660e-03 -0.0010169386 -5.811620e-03
## 111 1 -0.0004105098 -0.0039916800  0.000000e+00 -0.0003807890 -2.685046e-03
## 111 2  0.0000000000 -0.0015233779  0.000000e+00  0.0000000000 -1.865870e-04
##        0.0000000000  0.0000000000  0.000000e+00  0.0000000000  0.000000e+00
##               101 0         101 1         101 2         110 0         110 1
## 000 0  3.120129e-04 -0.0036747452  0.000000e+00 -5.973335e-05  0.0060535892
## 000 1  0.000000e+00 -0.0008907086 -3.813562e-03  0.000000e+00  0.0006373672
## 000 2  0.000000e+00  0.0000000000  5.427031e-04  0.000000e+00  0.0000000000
## 001 0 -1.931546e-04 -0.0074859964 -5.850233e-03 -1.757176e-04  0.0023869147
## 001 1  0.000000e+00 -0.0013103804 -6.357459e-03  0.000000e+00  0.0008830902
## 001 2  0.000000e+00  0.0000000000 -3.474405e-04  0.000000e+00  0.0000000000
## 010 0  8.955785e-05 -0.0029841589 -6.644630e-03 -1.092407e-03  0.0082479119
## 010 1  0.000000e+00 -0.0009738350 -3.176836e-03  0.000000e+00  0.0010760664
## 010 2  0.000000e+00  0.0000000000 -9.527772e-05  0.000000e+00  0.0000000000
## 011 0  4.330042e-04 -0.0010885192 -1.246733e-02  1.135006e-04  0.0025007576
## 011 1  0.000000e+00 -0.0019200064 -6.192020e-03  0.000000e+00  0.0001240776
## 011 2  0.000000e+00  0.0000000000 -4.800651e-04  0.000000e+00  0.0000000000
## 100 0  3.456629e-04 -0.0022229127 -6.513656e-03 -5.389467e-04  0.0029940183
## 100 1  0.000000e+00 -0.0007151325  2.059557e-04  0.000000e+00  0.0012143710
## 100 2  0.000000e+00  0.0000000000 -5.942301e-05  0.000000e+00  0.0000000000
## 101 0 -3.654259e-04 -0.0007395958 -1.276577e-03 -2.532428e-03  0.0048204336
## 101 1  0.000000e+00 -0.0013291282 -1.812253e-03  0.000000e+00 -0.0004433139
## 101 2  0.000000e+00  0.0000000000  3.579703e-05  0.000000e+00  0.0000000000
## 110 0 -4.953929e-04 -0.0039081365 -1.124816e-03 -2.497909e-03  0.0018606519
## 110 1  0.000000e+00 -0.0004182478 -1.616466e-03  0.000000e+00  0.0014375262
## 110 2  0.000000e+00  0.0000000000  4.894518e-04  0.000000e+00  0.0000000000
## 111 0 -1.819543e-03 -0.0098615239 -6.682579e-03 -4.611808e-03 -0.0040988436
## 111 1  0.000000e+00 -0.0004066406  4.711606e-04  0.000000e+00  0.0011198672
## 111 2  0.000000e+00  0.0000000000 -5.960220e-05  0.000000e+00  0.0000000000
##        0.000000e+00  0.0000000000  0.000000e+00  0.000000e+00  0.0000000000
##               110 2         111 0         111 1         111 2             3
## 000 0  0.000000e+00  1.390524e-03  0.0000000000  0.000000e+00 -6.616346e-05
## 000 1  2.067284e-03  0.000000e+00  0.0004043759  0.000000e+00 -4.958547e-04
## 000 2  7.089894e-04  0.000000e+00  0.0000000000 -3.504522e-04 -1.345152e-03
## 001 0 -7.496510e-03 -2.348397e-03 -0.0075933156  0.000000e+00  2.017572e-02
## 001 1  1.310670e-03  0.000000e+00 -0.0006238652 -7.613575e-04  1.432248e-02
## 001 2 -1.530387e-03  0.000000e+00  0.0000000000 -1.198623e-03  5.500064e-03
## 010 0  3.179466e-03  1.703918e-03  0.0027003490  0.000000e+00  1.152880e-02
## 010 1 -2.123698e-03  0.000000e+00 -0.0009983208  1.924167e-03  9.999810e-03
## 010 2 -4.940757e-04  0.000000e+00  0.0000000000 -3.050872e-04  2.334093e-03
## 011 0 -6.796746e-03  3.147004e-06 -0.0041842786  1.996316e-02  1.401340e-02
## 011 1 -2.467419e-03  0.000000e+00 -0.0054699094 -3.939340e-03  4.369398e-02
## 011 2 -3.944070e-04  0.000000e+00  0.0000000000 -4.714773e-03  9.409024e-03
## 100 0 -6.338073e-04  1.263922e-03  0.0054519918  0.000000e+00 -1.476509e-02
## 100 1  8.618178e-04  0.000000e+00 -0.0014036189  2.577638e-03 -1.426093e-03
## 100 2 -4.140273e-05  0.000000e+00  0.0000000000  3.558052e-04  6.108164e-04
## 101 0  7.403726e-03 -4.834188e-04  0.0046838672  5.344293e-03 -1.783904e-02
## 101 1 -5.653038e-04  0.000000e+00 -0.0027393592  2.518012e-03  2.318602e-02
## 101 2 -1.002382e-04  0.000000e+00  0.0000000000 -6.266012e-04  1.873044e-03
## 110 0 -1.402781e-04 -1.825230e-03 -0.0025531902  1.402105e-02  1.559963e-02
## 110 1  2.169879e-04  0.000000e+00 -0.0021856132  4.135183e-03  9.464523e-04
## 110 2  8.166680e-04  0.000000e+00  0.0000000000 -4.619966e-05 -1.497560e-03
## 111 0 -9.695221e-05 -7.309703e-03 -0.0179126056  1.216247e-02  7.686168e-02
## 111 1  2.732448e-03  0.000000e+00 -0.0019544966  1.170702e-03  2.508661e-03
## 111 2 -7.675090e-06  0.000000e+00  0.0000000000 -5.851193e-04  1.608188e-03
##        0.000000e+00  0.000000e+00  0.0000000000  0.000000e+00  0.000000e+00
mean(difference_matrix)
## [1] -5.191025e-19
#because the difference between the league's matrix and SLN's matrix is negative that means the SLN' matrix was greater. Therefore, the 2016 Cardinals were more likely to end the half-inning after 3 PAs compared to league
count_runners_out = function(s){
  s %>% str_split("") %>% pluck(1) %>% as.numeric() %>% sum(na.rm = TRUE)
}
  
runners_out = sapply(row.names(T_matrix_2016), 
                      count_runners_out)[-25]

R = outer(runners_out + 1, runners_out, FUN = "-")
names(R) = names(T_matrix_2016)[-25]
R = cbind(R, rep(0,24))

set.seed(430)
simulate_half_inning = function(P, R, start = 1){
  s = start
  path = NULL
  runs = 0
  while(s < 25){
    s.new = sample(1:25, size = 1, prob = P[s, ])
    path = c(path, s.new)
    runs = runs + R[s, s.new]
    s = s.new
  }
  runs
}

B = 1e5
system.time({
  RUNS = replicate(B, simulate_half_inning(P_matrix_2016, R))
})
##    user  system elapsed 
##    7.91    0.11    8.03
RUNS.j = function(j){
  mean(replicate(B, simulate_half_inning(P_matrix_2016, R, j)))
}


#doMC is not compatible with my computer (Windows) so hopefully this code replaces what the notes achieves. If it does not, I also have included the function that is in the notes to get the RE24 matrix

cl <- makeCluster(detectCores() - 2)  
registerDoParallel(cl)

RNGkind(kind = "L'Ecuyer-CMRG")

system.time({
  RE_bat <- foreach(j = 1:24, .combine = 'c') %dopar% RUNS.j(j) %>% 
    unlist() %>%
    matrix(nrow = 8, ncol = 3, byrow = TRUE, 
           dimnames = list(c("000","001","010","011",
                             "100","101","110","111"),
                           c("0 outs", "1 out", "2 outs")))
})
##    user  system elapsed 
##    0.06    0.00   33.66
stopCluster(cl)
round(RE_bat, 2)
##     0 outs 1 out 2 outs
## 000   0.50  0.25   0.09
## 001   1.34  0.97   0.39
## 010   1.14  0.67   0.32
## 011   2.01  1.48   0.58
## 100   0.83  0.49   0.20
## 101   1.76  1.23   0.47
## 110   1.51  0.89   0.38
## 111   2.52  1.52   0.66
#library(doMC)
#library(parallel)
#registerDoMC(cores=detectCores()-2)
#RNGkind(kind = "L'Ecuyer-CMRG")
#system.time({
#  RE_bat = foreach(j = 1:24) %dopar% RUNS.j(j) %>% 
#    unlist() %>%
#    matrix(nrow = 8, ncol = 3, byrow = TRUE, 
#           dimnames =list(c("000","001","010","011",
#                          "100","101","110","111"),
#                          c("0 outs", "1 out", "2 outs")))
#})
#round(RE_bat, 2)

Question 3 Problem 5 in Section 5.11 of Analyzing Baseball Data with R. Suppose one is interested in studying how runners move with a single.

singles <- dat2016_updated%>%
  filter(EVENT_CD == 20)
singles
singles <- singles %>%
  group_by(STATE, NEW.STATE)%>%
  summarize(frequency = n(), .groups = "drop")
singles
singles_1B <- singles %>%
  filter(STATE %in% c("100 0", "100 1", "100 2"))

singles_1B_compare <- singles_1B %>%
  mutate(
    running_state = case_when(
      NEW.STATE %in% c("101 0", "101 1", "101 2") ~ "Third",
      NEW.STATE %in% c("110 0", "110 1", "110 2") ~ "Second",
      TRUE ~ NA 
    )
  ) %>%
  drop_na(running_state) %>%  
  group_by(running_state) %>%
  summarise(proportion = sum(frequency) / sum(singles_1B$frequency))  

singles_1B_compare
#the lead runner on first base is more likely to move to second when a single is hit
singles_B <- singles %>%
  filter(STATE %in% c("110 0", "110 1", "110 2"))
singles_B
singles_run_scored <- singles_B%>%
  mutate(run_scored = case_when(
    STATE == "110 0" & NEW.STATE == "001 0" ~ 2,
    STATE == "110 0" & NEW.STATE == "001 1" ~ 1,
    STATE == "110 0" & NEW.STATE == "010 0" ~ 2,
    STATE == "110 0" & NEW.STATE == "010 1" ~ 1,
    STATE == "110 0" & NEW.STATE == "011 0" ~ 1,
    STATE == "110 0" & NEW.STATE == "011 1" ~ 0,
    STATE == "110 0" & NEW.STATE == "100 1" ~ 1,
    STATE == "110 0" & NEW.STATE == "101 0" ~ 1,
    STATE == "110 0" & NEW.STATE == "110 0" ~ 1,
    STATE == "110 0" & NEW.STATE == "110 1" ~ 0,
    STATE == "110 0" & NEW.STATE == "111 0" ~ 0,
    STATE == "110 1" & NEW.STATE == "001 1" ~ 2,
    STATE == "110 1" & NEW.STATE == "001 2" ~ 1,
    STATE == "110 1" & NEW.STATE == "010 1" ~ 2,
    STATE == "110 1" & NEW.STATE == "010 2" ~ 1,
    STATE == "110 1" & NEW.STATE == "011 1" ~ 1,
    STATE == "110 1" & NEW.STATE == "011 2" ~ 0,
    STATE == "110 1" & NEW.STATE == "100 1" ~ 2,
    STATE == "110 1" & NEW.STATE == "100 2" ~ 1,
    STATE == "110 1" & NEW.STATE == "101 1" ~ 1,
    STATE == "110 1" & NEW.STATE == "101 2" ~ 0,
    STATE == "110 1" & NEW.STATE == "110 1" ~ 1,
    STATE == "110 1" & NEW.STATE == "110 2" ~ 0,
    STATE == "110 1" & NEW.STATE == "111 1" ~ 0,
    STATE == "110 2" & NEW.STATE == "001 2" ~ 2,
    STATE == "110 2" & NEW.STATE == "001 3" ~ 1,
    STATE == "110 2" & NEW.STATE == "010 2" ~ 2,
    STATE == "110 2" & NEW.STATE == "011 2" ~ 0,
    STATE == "110 2" & NEW.STATE == "011 3" ~ 1,
    STATE == "110 2" & NEW.STATE == "100 2" ~ 2,
    STATE == "110 2" & NEW.STATE == "100 3" ~ 1,
    STATE == "110 2" & NEW.STATE == "101 2" ~ 1,
    STATE == "110 2" & NEW.STATE == "101 3" ~ 0,
    STATE == "110 2" & NEW.STATE == "110 2" ~ 1,
    STATE == "110 2" & NEW.STATE == "110 3" ~ 0,
    STATE == "110 2" & NEW.STATE == "111 2" ~ 0,
    TRUE ~ NA
  )
)
num_plays_with_runs0  <- singles_run_scored%>%
  filter(run_scored == 0) %>%
  nrow()

num_plays_with_runs1  <- singles_run_scored%>%
  filter(run_scored == 1) %>%
  nrow()

num_plays_with_runs2  <- singles_run_scored%>%
  filter(run_scored == 2) %>%
  nrow()
  

total_plays <- nrow(singles_run_scored)

probability_run_scored0 <- num_plays_with_runs0 / total_plays

probability_run_scored1 <- num_plays_with_runs1 / total_plays

probability_run_scored2 <- num_plays_with_runs2 / total_plays

probability_singles_score <- data.frame(
  run_scenario = c("run_scored0", "run_scored1", "run_scored2"),
  probability = c(probability_run_scored0, probability_run_scored1, probability_run_scored2)
)
probability_singles_score
#probability that 0, 1, or 2 runs are scored

Question 4 Problem 1 in Section 9.5 of Analyzing Baseball Data with R.

P <- matrix(c(.3, .7,  0,  0, 
               0, .3, .7,  0,
               0,  0, .3, .7, 
               0,  0,  0,  1), 4, 4, byrow = TRUE)
P2 <- P %*% P

colnames(P2) <- c("0 outs", "1 outs", "2 outs", "3 outs")
rownames(P2) <- c("0 outs", "1 outs", "2 outs", "3 outs")
P2  
##        0 outs 1 outs 2 outs 3 outs
## 0 outs   0.09   0.42   0.49   0.00
## 1 outs   0.00   0.09   0.42   0.49
## 2 outs   0.00   0.00   0.09   0.91
## 3 outs   0.00   0.00   0.00   1.00
prob_0_to_1 <- P2[1, 2]
prob_0_to_1
## [1] 0.42
#the probability of moving from 0 outs to 1 out after two plate appearances in 0.42

N <- solve(diag(c(1, 1, 1)) - P[-4, -4])
colnames(N) <- c("0 outs", "1 outs", "2 outs")
rownames(N) <- c("0 outs", "1 outs", "2 outs")
N
##          0 outs   1 outs   2 outs
## 0 outs 1.428571 1.428571 1.428571
## 1 outs 0.000000 1.428571 1.428571
## 2 outs 0.000000 0.000000 1.428571
average_PA_innings_states <- rowSums(N)
average_PA_innings_states
##   0 outs   1 outs   2 outs 
## 4.285714 2.857143 1.428571
# the average number of plate appearances at each state in one inning is 4.29 at 0 out, 2.86 at 1 outs, and 1.43 at 2 outs
average_PA_innings <- sum(rowSums(N))
average_PA_innings
## [1] 8.571429
# the average number of plate appearances in a total inning is 8.571429

Question 5 Problem 3 in Section 9.5 of Analyzing Baseball Data with R. In Section 9.2.4, the expected number of runs as calculated for each one of the 24 possible runners-outs situations using data from the 2016 season. To see how these values can change across seasons, download play-by-play data from Retrosheet for the 1968 season, construct the probability transition matrix, simulate 10,000 half-innings from each of the 24 situations, and compute the run expectancy matrix. Compare this 1968 run expectancy matrix with the one computed using 2016 data.

dat1968 <- read_csv("all1968.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 123586 Columns: 98
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (33): game_id, away_team_id, pitch_seq_tx, bat_id, bat_hand_cd, resp_bat...
## dbl (37): inn_ct, bat_home_id, outs_ct, balls_ct, strikes_ct, away_score_ct,...
## lgl (28): leadoff_fl, ph_fl, bat_event_fl, ab_fl, sh_fl, sf_fl, dp_fl, tp_fl...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#fixing the NA in all of the bases and states
dat1968 <- dat1968 %>%
  mutate(half_inning_id = paste(game_id, inn_ct, bat_home_id))%>%
  retrosheet_add_states() %>%
  mutate(
    bases = case_when(
      bases == "NANANA" ~ "000",
      bases == "1NANA"  ~ "100",
      bases == "11NA"   ~ "110",
      bases == "1NA1"   ~ "101",
      bases == "NA1NA"  ~ "010",
      bases == "NANA1"  ~ "001",
      bases == "NA11"   ~ "011",
      TRUE ~ bases  
    )
  )%>%
  mutate(
    state = case_when(
      state == "NANANA 0" ~ "000 0",
      state == "NANANA 1" ~ "000 1",
      state == "1NANA 1" ~ "100 1",
      state == "NANANA 2" ~ "000 2",
      state == "NA1NA 2" ~ "010 2",
      state == "11NA 2" ~ "110 2",
      state == "NA1NA 0" ~ "010 0",
      state == "11NA 0" ~ "110 0",
      state == "11NA 1" ~ "110 1",
      state == "1NA1 2" ~ "101 2",
      state == "1NANA 2" ~ "100 2",
      state == "1NANA 0" ~ "100 0",
      state == "1NA1 0" ~ "101 0",
      state == "NANA1 2" ~ "001 2",
      state == "NA1NA 1" ~ "010 1",
      state == "NA11 1" ~ "011 1",
      state == "1NA1 1" ~ "101 1",
      state == "NA11 2" ~ "011 2",
      state == "NANA1 0" ~ "001 0",
      state == "NANA1 1" ~ "001 1",
      state == "NA11 0" ~ "011 0", 
      TRUE ~ state
    )
  )

#creating half innings

half_innings_1968 <- dat1968 %>%
  mutate(
    runs = away_score_ct + home_score_ct,
    half_inning_id = paste(game_id, inn_ct, bat_home_id)
  ) %>%
  group_by(half_inning_id) %>%
  summarize(
    outs_inning = sum(event_outs_ct), 
    runs_inning = sum(runs_scored),
    runs_start = first(runs),
    max_runs = runs_inning + runs_start
  )
dat1968_complete <- dat1968 %>%
  inner_join(half_innings_1968, by = "half_inning_id") %>%
  mutate(runs_roi = max_runs - runs_start)
  
dat1968_complete <- dat1968_complete %>%
  mutate(new_state = str_replace(new_state, "[0-1]{3} 3", "3"))
#creating transition probability matrix

T_matrix_1968 <- dat1968_complete %>%
  select(state, new_state) %>%
  table()
T_matrix_1968
##        new_state
## state   000 0 000 1 000 2 001 0 001 1 001 2 010 0 010 1 010 2 011 0 011 1 011 2
##   000 0   558 20789     0   204     0     0  1124     0     0     0     0     0
##   000 1     0   366 15402     0   132     0     0   774     0     0     0     0
##   000 2     0     0   309     0     0   122     0     0   624     0     0     0
##   001 0    20    63     5     5   234     0    11     0     0     0     0     0
##   001 1     0    55   197     0     5   439     0    42    13     0     1     0
##   001 2     0     0    56     0     0     8     0     0    51     0     0     0
##   010 0    21    24    26   123   405     0    66   760     0    13     0     0
##   010 1     0    56    47     0   171   508     0   141  1755     0     9     0
##   010 2     0     0    60     0     0   141     0     0   189     0     0     2
##   011 0     5     2     0    13    34     6     7    29     4     4   142     0
##   011 1     0     7     4     0    22    79     0    19    72     0     7   266
##   011 2     0     0    12     0     0    15     0     0    43     0     0     2
##   100 0   116   250   756    90    22     0   635  1154     0   186     0     0
##   100 1     0   150   285     0    81    19     0   686   832     0   220     0
##   100 2     0     0   132     0     0   101     0     0   640     0     0   157
##   101 0    16     2    47    10     7    12    18    36     4    62    21     0
##   101 1     0    13     1     0    20    18     0    33   100     0    80    30
##   101 2     0     0    19     0     0    15     0     0    67     0     0   112
##   110 0    14     2     0    14     4   101    15    11    23    85   189     0
##   110 1     0    53     2     0    13     7     0    38    16     0   188   222
##   110 2     0     0    66     0     0    29     0     0    65     0     0   154
##   111 0     6     0     0     1     2    22     3     2     3    16    15    15
##   111 1     0     9     2     0     6     3     0    11     7     0    40    40
##   111 2     0     0    22     0     0     8     0     0    18     0     0    42
##        new_state
## state   100 0 100 1 100 2 101 0 101 1 101 2 110 0 110 1 110 2 111 0 111 1 111 2
##   000 0  7108     0     0     0     0     0     0     0     0     0     0     0
##   000 1     0  5156     0     0     0     0     0     0     0     0     0     0
##   000 2     0     0  4298     0     0     0     0     0     0     0     0     0
##   001 0    67     5     0    49     0     0     0     0     0     0     0     0
##   001 1     0   194    47     0   144     0     0     0     0     0     0     0
##   001 2     0     0   231     0     0   253     0     0     0     0     0     0
##   010 0    80    31     0   167     0     0   163     0     0     0     0     0
##   010 1     0   255    48     0   147     0     0   552     0     0     0     0
##   010 2     0     0   415     0     0   132     0     0   745     0     0     0
##   011 0    22     2     0    18     4     0     1     3     0    69     0     0
##   011 1     0    49     2     0    48    25     0     7     8     0   284     0
##   011 2     0     0    83     0     0    16     0     0     2     0     0   271
##   100 0     8  2597     0   384     0     0  1088     0     0     0     0     0
##   100 1     0    14  3378     0   448     0     0  1352     0     0     0     0
##   100 2     0     0     8     0     0   464     0     0  1372     0     0     0
##   101 0     2   116     4    38   180     0    93    20     0    50     0     0
##   101 1     0     3   224     0    67   292     0   164    31     0   105     0
##   101 2     0     0     8     0     0    80     0     0   192     0     0   144
##   110 0     1    18    19    75   125     0    57   473     0   200     0     0
##   110 1     0     3    26     0   124   235     0   174  1007     0   322     0
##   110 2     0     0     4     0     0   165     0     0   204     0     0   368
##   111 0     0     1     4     7    21     2    34    28     3    56   127     0
##   111 1     0     0     7     0    47    84     0    50    74     0   114   293
##   111 2     0     0     0     0     0    58     0     0    66     0     0   114
##        new_state
## state       3
##   000 0     0
##   000 1     0
##   000 2 12086
##   001 0     0
##   001 1    18
##   001 2  1051
##   010 0     0
##   010 1    34
##   010 2  2835
##   011 0     0
##   011 1    11
##   011 2   598
##   100 0     0
##   100 1   973
##   100 2  5921
##   101 0     0
##   101 1   173
##   101 2  1167
##   110 0     6
##   110 1   374
##   110 2  2632
##   111 0     1
##   111 1   156
##   111 2   854
P_matrix_1968 <- prop.table(T_matrix_1968, 1)

P_matrix_1968 <- P_matrix_1968 %>%
  rbind("3" = c(rep(0, 24), 1))

P_matrix_1968 %>%
  apply(MARGIN = 1, FUN = sum)
## 000 0 000 1 000 2 001 0 001 1 001 2 010 0 010 1 010 2 011 0 011 1 011 2 100 0 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## 100 1 100 2 101 0 101 1 101 2 110 0 110 1 110 2 111 0 111 1 111 2     3 
##     1     1     1     1     1     1     1     1     1     1     1     1
P_matrix_1968
##             000 0       000 1        000 2       001 0       001 1       001 2
## 000 0 0.018735520 0.698015647 0.0000000000 0.006849545 0.000000000 0.000000000
## 000 1 0.000000000 0.016765918 0.7055428310 0.000000000 0.006046725 0.000000000
## 000 2 0.000000000 0.000000000 0.0177189059 0.000000000 0.000000000 0.006995814
## 001 0 0.043572985 0.137254902 0.0108932462 0.010893246 0.509803922 0.000000000
## 001 1 0.000000000 0.047619048 0.1705627706 0.000000000 0.004329004 0.380086580
## 001 2 0.000000000 0.000000000 0.0339393939 0.000000000 0.000000000 0.004848485
## 010 0 0.011176158 0.012772751 0.0138371474 0.065460351 0.215540181 0.000000000
## 010 1 0.000000000 0.015041633 0.0126242278 0.000000000 0.045930701 0.136449100
## 010 2 0.000000000 0.000000000 0.0132772737 0.000000000 0.000000000 0.031201593
## 011 0 0.013698630 0.005479452 0.0000000000 0.035616438 0.093150685 0.016438356
## 011 1 0.000000000 0.007692308 0.0043956044 0.000000000 0.024175824 0.086813187
## 011 2 0.000000000 0.000000000 0.0115163148 0.000000000 0.000000000 0.014395393
## 100 0 0.015920944 0.034312380 0.1037606368 0.012352457 0.003019489 0.000000000
## 100 1 0.000000000 0.017776724 0.0337757763 0.000000000 0.009599431 0.002251718
## 100 2 0.000000000 0.000000000 0.0150085276 0.000000000 0.000000000 0.011483798
## 101 0 0.021680217 0.002710027 0.0636856369 0.013550136 0.009485095 0.016260163
## 101 1 0.000000000 0.009601182 0.0007385524 0.000000000 0.014771049 0.013293944
## 101 2 0.000000000 0.000000000 0.0105321508 0.000000000 0.000000000 0.008314856
## 110 0 0.009776536 0.001396648 0.0000000000 0.009776536 0.002793296 0.070530726
## 110 1 0.000000000 0.018901569 0.0007132668 0.000000000 0.004636234 0.002496434
## 110 2 0.000000000 0.000000000 0.0179007323 0.000000000 0.000000000 0.007865473
## 111 0 0.016260163 0.000000000 0.0000000000 0.002710027 0.005420054 0.059620596
## 111 1 0.000000000 0.009544008 0.0021208908 0.000000000 0.006362672 0.003181336
## 111 2 0.000000000 0.000000000 0.0186125212 0.000000000 0.000000000 0.006768190
## 3     0.000000000 0.000000000 0.0000000000 0.000000000 0.000000000 0.000000000
##             010 0       010 1       010 2       011 0        011 1        011 2
## 000 0 0.037739650 0.000000000 0.000000000 0.000000000 0.0000000000 0.0000000000
## 000 1 0.000000000 0.035455795 0.000000000 0.000000000 0.0000000000 0.0000000000
## 000 2 0.000000000 0.000000000 0.035781868 0.000000000 0.0000000000 0.0000000000
## 001 0 0.023965142 0.000000000 0.000000000 0.000000000 0.0000000000 0.0000000000
## 001 1 0.000000000 0.036363636 0.011255411 0.000000000 0.0008658009 0.0000000000
## 001 2 0.000000000 0.000000000 0.030909091 0.000000000 0.0000000000 0.0000000000
## 010 0 0.035125067 0.404470463 0.000000000 0.006918574 0.0000000000 0.0000000000
## 010 1 0.000000000 0.037872683 0.471394037 0.000000000 0.0024174053 0.0000000000
## 010 2 0.000000000 0.000000000 0.041823412 0.000000000 0.0000000000 0.0004425758
## 011 0 0.019178082 0.079452055 0.010958904 0.010958904 0.3890410959 0.0000000000
## 011 1 0.000000000 0.020879121 0.079120879 0.000000000 0.0076923077 0.2923076923
## 011 2 0.000000000 0.000000000 0.041266795 0.000000000 0.0000000000 0.0019193858
## 100 0 0.087153445 0.158385946 0.000000000 0.025528411 0.0000000000 0.0000000000
## 100 1 0.000000000 0.081298886 0.098601564 0.000000000 0.0260725290 0.0000000000
## 100 2 0.000000000 0.000000000 0.072768619 0.000000000 0.0000000000 0.0178510517
## 101 0 0.024390244 0.048780488 0.005420054 0.084010840 0.0284552846 0.0000000000
## 101 1 0.000000000 0.024372230 0.073855244 0.000000000 0.0590841950 0.0221565731
## 101 2 0.000000000 0.000000000 0.037139690 0.000000000 0.0000000000 0.0620842572
## 110 0 0.010474860 0.007681564 0.016061453 0.059357542 0.1319832402 0.0000000000
## 110 1 0.000000000 0.013552068 0.005706134 0.000000000 0.0670470756 0.0791726106
## 110 2 0.000000000 0.000000000 0.017629509 0.000000000 0.0000000000 0.0417683754
## 111 0 0.008130081 0.005420054 0.008130081 0.043360434 0.0406504065 0.0406504065
## 111 1 0.000000000 0.011664899 0.007423118 0.000000000 0.0424178155 0.0424178155
## 111 2 0.000000000 0.000000000 0.015228426 0.000000000 0.0000000000 0.0355329949
## 3     0.000000000 0.000000000 0.000000000 0.000000000 0.0000000000 0.0000000000
##             100 0       100 1        100 2      101 0      101 1       101 2
## 000 0 0.238659638 0.000000000 0.0000000000 0.00000000 0.00000000 0.000000000
## 000 1 0.000000000 0.236188731 0.0000000000 0.00000000 0.00000000 0.000000000
## 000 2 0.000000000 0.000000000 0.2464590860 0.00000000 0.00000000 0.000000000
## 001 0 0.145969499 0.010893246 0.0000000000 0.10675381 0.00000000 0.000000000
## 001 1 0.000000000 0.167965368 0.0406926407 0.00000000 0.12467532 0.000000000
## 001 2 0.000000000 0.000000000 0.1400000000 0.00000000 0.00000000 0.153333333
## 010 0 0.042575838 0.016498137 0.0000000000 0.08887706 0.00000000 0.000000000
## 010 1 0.000000000 0.068493151 0.0128928284 0.00000000 0.03948429 0.000000000
## 010 2 0.000000000 0.000000000 0.0918344767 0.00000000 0.00000000 0.029210002
## 011 0 0.060273973 0.005479452 0.0000000000 0.04931507 0.01095890 0.000000000
## 011 1 0.000000000 0.053846154 0.0021978022 0.00000000 0.05274725 0.027472527
## 011 2 0.000000000 0.000000000 0.0796545106 0.00000000 0.00000000 0.015355086
## 100 0 0.001097996 0.356437002 0.0000000000 0.05270382 0.00000000 0.000000000
## 100 1 0.000000000 0.001659161 0.4003318322 0.00000000 0.05309315 0.000000000
## 100 2 0.000000000 0.000000000 0.0009096077 0.00000000 0.00000000 0.052757248
## 101 0 0.002710027 0.157181572 0.0054200542 0.05149051 0.24390244 0.000000000
## 101 1 0.000000000 0.002215657 0.1654357459 0.00000000 0.04948301 0.215657312
## 101 2 0.000000000 0.000000000 0.0044345898 0.00000000 0.00000000 0.044345898
## 110 0 0.000698324 0.012569832 0.0132681564 0.05237430 0.08729050 0.000000000
## 110 1 0.000000000 0.001069900 0.0092724679 0.00000000 0.04422254 0.083808845
## 110 2 0.000000000 0.000000000 0.0010848929 0.00000000 0.00000000 0.044751831
## 111 0 0.000000000 0.002710027 0.0108401084 0.01897019 0.05691057 0.005420054
## 111 1 0.000000000 0.000000000 0.0074231177 0.00000000 0.04984093 0.089077413
## 111 2 0.000000000 0.000000000 0.0000000000 0.00000000 0.00000000 0.049069374
## 3     0.000000000 0.000000000 0.0000000000 0.00000000 0.00000000 0.000000000
##             110 0       110 1       110 2      111 0      111 1      111 2
## 000 0 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 000 1 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 000 2 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 001 0 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 001 1 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 001 2 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 010 0 0.086748270 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 010 1 0.000000000 0.148267526 0.000000000 0.00000000 0.00000000 0.00000000
## 010 2 0.000000000 0.000000000 0.164859482 0.00000000 0.00000000 0.00000000
## 011 0 0.002739726 0.008219178 0.000000000 0.18904110 0.00000000 0.00000000
## 011 1 0.000000000 0.007692308 0.008791209 0.00000000 0.31208791 0.00000000
## 011 2 0.000000000 0.000000000 0.001919386 0.00000000 0.00000000 0.26007678
## 100 0 0.149327477 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
## 100 1 0.000000000 0.160227542 0.000000000 0.00000000 0.00000000 0.00000000
## 100 2 0.000000000 0.000000000 0.155997726 0.00000000 0.00000000 0.00000000
## 101 0 0.126016260 0.027100271 0.000000000 0.06775068 0.00000000 0.00000000
## 101 1 0.000000000 0.121122600 0.022895126 0.00000000 0.07754801 0.00000000
## 101 2 0.000000000 0.000000000 0.106430155 0.00000000 0.00000000 0.07982262
## 110 0 0.039804469 0.330307263 0.000000000 0.13966480 0.00000000 0.00000000
## 110 1 0.000000000 0.062054208 0.359129815 0.00000000 0.11483595 0.00000000
## 110 2 0.000000000 0.000000000 0.055329536 0.00000000 0.00000000 0.09981014
## 111 0 0.092140921 0.075880759 0.008130081 0.15176152 0.34417344 0.00000000
## 111 1 0.000000000 0.053022269 0.078472959 0.00000000 0.12089077 0.31071050
## 111 2 0.000000000 0.000000000 0.055837563 0.00000000 0.00000000 0.09644670
## 3     0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
##                 3
## 000 0 0.000000000
## 000 1 0.000000000
## 000 2 0.693044326
## 001 0 0.000000000
## 001 1 0.015584416
## 001 2 0.636969697
## 010 0 0.000000000
## 010 1 0.009132420
## 010 2 0.627351184
## 011 0 0.000000000
## 011 1 0.012087912
## 011 2 0.573896353
## 100 0 0.000000000
## 100 1 0.115311685
## 100 2 0.673223422
## 101 0 0.000000000
## 101 1 0.127769572
## 101 2 0.646895787
## 110 0 0.004189944
## 110 1 0.133380884
## 110 2 0.713859506
## 111 0 0.002710027
## 111 1 0.165429480
## 111 2 0.722504230
## 3     1.000000000
round(P_matrix_1968, 2)
##       000 0 000 1 000 2 001 0 001 1 001 2 010 0 010 1 010 2 011 0 011 1 011 2
## 000 0  0.02  0.70  0.00  0.01  0.00  0.00  0.04  0.00  0.00  0.00  0.00  0.00
## 000 1  0.00  0.02  0.71  0.00  0.01  0.00  0.00  0.04  0.00  0.00  0.00  0.00
## 000 2  0.00  0.00  0.02  0.00  0.00  0.01  0.00  0.00  0.04  0.00  0.00  0.00
## 001 0  0.04  0.14  0.01  0.01  0.51  0.00  0.02  0.00  0.00  0.00  0.00  0.00
## 001 1  0.00  0.05  0.17  0.00  0.00  0.38  0.00  0.04  0.01  0.00  0.00  0.00
## 001 2  0.00  0.00  0.03  0.00  0.00  0.00  0.00  0.00  0.03  0.00  0.00  0.00
## 010 0  0.01  0.01  0.01  0.07  0.22  0.00  0.04  0.40  0.00  0.01  0.00  0.00
## 010 1  0.00  0.02  0.01  0.00  0.05  0.14  0.00  0.04  0.47  0.00  0.00  0.00
## 010 2  0.00  0.00  0.01  0.00  0.00  0.03  0.00  0.00  0.04  0.00  0.00  0.00
## 011 0  0.01  0.01  0.00  0.04  0.09  0.02  0.02  0.08  0.01  0.01  0.39  0.00
## 011 1  0.00  0.01  0.00  0.00  0.02  0.09  0.00  0.02  0.08  0.00  0.01  0.29
## 011 2  0.00  0.00  0.01  0.00  0.00  0.01  0.00  0.00  0.04  0.00  0.00  0.00
## 100 0  0.02  0.03  0.10  0.01  0.00  0.00  0.09  0.16  0.00  0.03  0.00  0.00
## 100 1  0.00  0.02  0.03  0.00  0.01  0.00  0.00  0.08  0.10  0.00  0.03  0.00
## 100 2  0.00  0.00  0.02  0.00  0.00  0.01  0.00  0.00  0.07  0.00  0.00  0.02
## 101 0  0.02  0.00  0.06  0.01  0.01  0.02  0.02  0.05  0.01  0.08  0.03  0.00
## 101 1  0.00  0.01  0.00  0.00  0.01  0.01  0.00  0.02  0.07  0.00  0.06  0.02
## 101 2  0.00  0.00  0.01  0.00  0.00  0.01  0.00  0.00  0.04  0.00  0.00  0.06
## 110 0  0.01  0.00  0.00  0.01  0.00  0.07  0.01  0.01  0.02  0.06  0.13  0.00
## 110 1  0.00  0.02  0.00  0.00  0.00  0.00  0.00  0.01  0.01  0.00  0.07  0.08
## 110 2  0.00  0.00  0.02  0.00  0.00  0.01  0.00  0.00  0.02  0.00  0.00  0.04
## 111 0  0.02  0.00  0.00  0.00  0.01  0.06  0.01  0.01  0.01  0.04  0.04  0.04
## 111 1  0.00  0.01  0.00  0.00  0.01  0.00  0.00  0.01  0.01  0.00  0.04  0.04
## 111 2  0.00  0.00  0.02  0.00  0.00  0.01  0.00  0.00  0.02  0.00  0.00  0.04
## 3      0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
##       100 0 100 1 100 2 101 0 101 1 101 2 110 0 110 1 110 2 111 0 111 1 111 2
## 000 0  0.24  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 000 1  0.00  0.24  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 000 2  0.00  0.00  0.25  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 001 0  0.15  0.01  0.00  0.11  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 001 1  0.00  0.17  0.04  0.00  0.12  0.00  0.00  0.00  0.00  0.00  0.00  0.00
## 001 2  0.00  0.00  0.14  0.00  0.00  0.15  0.00  0.00  0.00  0.00  0.00  0.00
## 010 0  0.04  0.02  0.00  0.09  0.00  0.00  0.09  0.00  0.00  0.00  0.00  0.00
## 010 1  0.00  0.07  0.01  0.00  0.04  0.00  0.00  0.15  0.00  0.00  0.00  0.00
## 010 2  0.00  0.00  0.09  0.00  0.00  0.03  0.00  0.00  0.16  0.00  0.00  0.00
## 011 0  0.06  0.01  0.00  0.05  0.01  0.00  0.00  0.01  0.00  0.19  0.00  0.00
## 011 1  0.00  0.05  0.00  0.00  0.05  0.03  0.00  0.01  0.01  0.00  0.31  0.00
## 011 2  0.00  0.00  0.08  0.00  0.00  0.02  0.00  0.00  0.00  0.00  0.00  0.26
## 100 0  0.00  0.36  0.00  0.05  0.00  0.00  0.15  0.00  0.00  0.00  0.00  0.00
## 100 1  0.00  0.00  0.40  0.00  0.05  0.00  0.00  0.16  0.00  0.00  0.00  0.00
## 100 2  0.00  0.00  0.00  0.00  0.00  0.05  0.00  0.00  0.16  0.00  0.00  0.00
## 101 0  0.00  0.16  0.01  0.05  0.24  0.00  0.13  0.03  0.00  0.07  0.00  0.00
## 101 1  0.00  0.00  0.17  0.00  0.05  0.22  0.00  0.12  0.02  0.00  0.08  0.00
## 101 2  0.00  0.00  0.00  0.00  0.00  0.04  0.00  0.00  0.11  0.00  0.00  0.08
## 110 0  0.00  0.01  0.01  0.05  0.09  0.00  0.04  0.33  0.00  0.14  0.00  0.00
## 110 1  0.00  0.00  0.01  0.00  0.04  0.08  0.00  0.06  0.36  0.00  0.11  0.00
## 110 2  0.00  0.00  0.00  0.00  0.00  0.04  0.00  0.00  0.06  0.00  0.00  0.10
## 111 0  0.00  0.00  0.01  0.02  0.06  0.01  0.09  0.08  0.01  0.15  0.34  0.00
## 111 1  0.00  0.00  0.01  0.00  0.05  0.09  0.00  0.05  0.08  0.00  0.12  0.31
## 111 2  0.00  0.00  0.00  0.00  0.00  0.05  0.00  0.00  0.06  0.00  0.00  0.10
## 3      0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00
##          3
## 000 0 0.00
## 000 1 0.00
## 000 2 0.69
## 001 0 0.00
## 001 1 0.02
## 001 2 0.64
## 010 0 0.00
## 010 1 0.01
## 010 2 0.63
## 011 0 0.00
## 011 1 0.01
## 011 2 0.57
## 100 0 0.00
## 100 1 0.12
## 100 2 0.67
## 101 0 0.00
## 101 1 0.13
## 101 2 0.65
## 110 0 0.00
## 110 1 0.13
## 110 2 0.71
## 111 0 0.00
## 111 1 0.17
## 111 2 0.72
## 3     1.00
num_havent_scored <- function(s) {
  s %>%
    str_split("") %>%
    pluck(1) %>%
    as.numeric() %>%
    sum(na.rm = TRUE)
}

runners_out <- T_matrix_1968 %>%
  row.names() %>%
  set_names() %>%
  map_int(num_havent_scored)

R_runs <- outer(
  runners_out + 1, 
  runners_out, 
  FUN = "-"
) %>%
  cbind("3" = rep(0, 24))

simulate_half_inning <- function(P, R, start = 1) {
  s <- start
  path <- NULL
  runs <- 0
  while (s < 25) {
    s_new <- sample(1:25, size = 1, prob = P[s, ])
    path <- c(path, s_new)
    runs <- runs + R[s, s_new]
    s <- s_new
  }
  runs
}

set.seed(111653)
simulated_runs <- 1:10000 %>%
  map_int(~simulate_half_inning(T_matrix_1968, R_runs))

table(simulated_runs)
## simulated_runs
##    0    1    2    3    4    5    6    7    8    9   10 
## 7250 1529  686  307  136   49   28    7    4    3    1
mean(simulated_runs)
## [1] 0.4897
#Over 10,000 half innings, an average of 0.5097 runs were scored; in the 2016 season, it was an average of 0.477
#Creating run expectancy matrix
dat1968_re <- dat1968_complete %>%
  group_by(bases, outs_ct) %>%
  summarise(mean_run_value = mean(runs_roi), .groups = "drop") %>%
  pivot_wider(
    names_from = outs_ct, 
    values_from = mean_run_value,
    names_prefix = "Outs="
  ) %>%
  column_to_rownames("bases") %>%  
  as.matrix()  

dat1968_re <- round(dat1968_re, 2)

colnames(dat1968_re) <- c("0 outs", "1 out", "2 outs")
dat1968_re
##     0 outs 1 out 2 outs
## 000   0.41  0.29   0.27
## 001   1.45  1.24   0.96
## 010   1.10  0.90   0.81
## 011   2.16  1.75   1.33
## 100   0.79  0.60   0.53
## 101   1.87  1.58   1.18
## 110   1.47  1.19   1.00
## 111   2.38  1.91   1.42
RUNS_out_2016
##     0 outs 1 out 2 outs
## 000   0.50  0.27   0.11
## 001   1.35  0.94   0.37
## 010   1.13  0.67   0.31
## 011   1.93  1.36   0.55
## 100   0.86  0.51   0.22
## 101   1.72  1.20   0.48
## 110   1.44  0.92   0.41
## 111   2.11  1.54   0.70